{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "29cd5cc1",
   "metadata": {},
   "source": [
    "### Parse OpenTargets Target Information \n",
    "\n",
    "* OT release 22.11"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1a0d35f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f8e175af",
   "metadata": {},
   "outputs": [],
   "source": [
    "wkdir = \"/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/\"\n",
    "wkdir_path = Path(wkdir)\n",
    "open_targets_dir = wkdir_path.joinpath(\"reference/opentargets/targets\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d66f356d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00070-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00073-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00045-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00056-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00083-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00166-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00095-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00084-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00008-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00130-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00028-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00157-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00160-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00041-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00150-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00106-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00149-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00111-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00081-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00023-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00044-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00043-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00102-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00138-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00017-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00029-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00037-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00100-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00127-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00113-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00178-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00065-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00000-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00018-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00109-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00076-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00144-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00199-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00088-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00061-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00172-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00155-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00196-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00195-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00069-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00090-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00187-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00066-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00158-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00132-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00129-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00015-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00011-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00019-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00147-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00050-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00181-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00046-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00051-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00182-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00164-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00141-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00064-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00175-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00190-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00152-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00143-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00054-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00072-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00014-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00098-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00034-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00068-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00052-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00105-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00133-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00169-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00117-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00145-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00012-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00078-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00062-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00179-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00153-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00059-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00142-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00119-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00131-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00058-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00104-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00099-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00173-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00136-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00122-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00071-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00042-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00089-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00074-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00047-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00009-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00118-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00183-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00063-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00053-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00033-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00151-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00094-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00067-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00057-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00039-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00060-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00170-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00159-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00091-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00026-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00163-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00137-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00112-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00107-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00162-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00121-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00180-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00087-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00093-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00086-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00191-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00036-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00092-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00193-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00079-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00185-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00024-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00171-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00184-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00115-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00096-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00004-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00186-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00189-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00030-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00005-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00135-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00035-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00194-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00146-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00002-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00013-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00114-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00168-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00027-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00038-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00082-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00177-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00176-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00192-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00020-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00025-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00156-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00001-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00154-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00161-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00016-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00101-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00126-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00174-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00007-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00097-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00010-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00148-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00040-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00125-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00124-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00077-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00134-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00188-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00022-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00048-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00085-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00110-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00055-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00049-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00165-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00120-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00140-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00128-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00197-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00006-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00021-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00116-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00003-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00139-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00075-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00198-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00080-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00031-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00123-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00108-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00103-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00032-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n",
      "/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/reference/opentargets/targets/part-00167-55bc3d37-90ad-48ea-a5a2-de8d3b646beb-c000.json\n"
     ]
    }
   ],
   "source": [
    "open_targets_paths = open_targets_dir.glob(\"*.json\")\n",
    "approved_drug_targets = []\n",
    "for path in open_targets_paths: \n",
    "    print(path)\n",
    "    with open(path, 'r') as json_data: \n",
    "        for line in json_data:\n",
    "            open_targets_json = json.loads(line)\n",
    "            gene_id = open_targets_json[\"id\"]\n",
    "            # check if tractability data available \n",
    "            if \"tractability\" in open_targets_json.keys():\n",
    "                for entry in open_targets_json[\"tractability\"]: \n",
    "                    if entry[\"id\"] == 'Approved Drug': \n",
    "                        if entry[\"value\"]: \n",
    "                            approved_drug_targets.append(gene_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "46a9e0d7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of approved drug targets: 929\n"
     ]
    }
   ],
   "source": [
    "print(f\"Number of approved drug targets: {len(approved_drug_targets)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2b9a1857",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_path = open_targets_dir.joinpath(\"open_targets_approved_drugs.txt\")\n",
    "with open(out_path, 'w') as f_out: \n",
    "    for gene_id in approved_drug_targets: \n",
    "        f_out.write(f\"{gene_id}\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6e3e46b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
